1 package org.apache.lucene.util;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 import java.io.IOException;
21 import java.util.Arrays;
22
23 import org.apache.lucene.search.DocIdSetIterator;
24
25
26
27
28
29
30
31
32
33
34
35
36 public class SparseFixedBitSet extends BitSet implements Bits, Accountable {
37
38 private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(SparseFixedBitSet.class);
39 private static final long SINGLE_ELEMENT_ARRAY_BYTES_USED = RamUsageEstimator.sizeOf(new long[1]);
40 private static final int MASK_4096 = (1 << 12) - 1;
41
42 private static int blockCount(int length) {
43 int blockCount = length >>> 12;
44 if ((blockCount << 12) < length) {
45 ++blockCount;
46 }
47 assert (blockCount << 12) >= length;
48 return blockCount;
49 }
50
51 final long[] indices;
52 final long[][] bits;
53 final int length;
54 int nonZeroLongCount;
55 long ramBytesUsed;
56
57
58
59 public SparseFixedBitSet(int length) {
60 if (length < 1) {
61 throw new IllegalArgumentException("length needs to be >= 1");
62 }
63 this.length = length;
64 final int blockCount = blockCount(length);
65 indices = new long[blockCount];
66 bits = new long[blockCount][];
67 ramBytesUsed = BASE_RAM_BYTES_USED
68 + RamUsageEstimator.shallowSizeOf(indices)
69 + RamUsageEstimator.shallowSizeOf(bits);
70 }
71
72 @Override
73 public int length() {
74 return length;
75 }
76
77 private boolean consistent(int index) {
78 assert index >= 0 && index < length : "index=" + index + ",length=" + length;
79 return true;
80 }
81
82 @Override
83 public int cardinality() {
84 int cardinality = 0;
85 for (long[] bitArray : bits) {
86 if (bitArray != null) {
87 for (long bits : bitArray) {
88 cardinality += Long.bitCount(bits);
89 }
90 }
91 }
92 return cardinality;
93 }
94
95 @Override
96 public int approximateCardinality() {
97
98
99
100 final int totalLongs = (length + 63) >>> 6;
101 assert totalLongs >= nonZeroLongCount;
102 final int zeroLongs = totalLongs - nonZeroLongCount;
103
104 final long estimate = Math.round(totalLongs * Math.log((double) totalLongs / zeroLongs));
105 return (int) Math.min(length, estimate);
106 }
107
108 @Override
109 public boolean get(int i) {
110 assert consistent(i);
111 final int i4096 = i >>> 12;
112 final long index = indices[i4096];
113 final int i64 = i >>> 6;
114
115
116 if ((index & (1L << i64)) == 0) {
117 return false;
118 }
119
120
121
122
123 final long bits = this.bits[i4096][Long.bitCount(index & ((1L << i64) - 1))];
124 return (bits & (1L << i)) != 0;
125 }
126
127 private static int oversize(int s) {
128 int newSize = s + (s >>> 1);
129 if (newSize > 50) {
130 newSize = 64;
131 }
132 return newSize;
133 }
134
135
136
137
138 public void set(int i) {
139 assert consistent(i);
140 final int i4096 = i >>> 12;
141 final long index = indices[i4096];
142 final int i64 = i >>> 6;
143 if ((index & (1L << i64)) != 0) {
144
145
146
147 bits[i4096][Long.bitCount(index & ((1L << i64) - 1))] |= 1L << i;
148 } else if (index == 0) {
149
150
151 insertBlock(i4096, i64, i);
152 } else {
153
154
155
156 insertLong(i4096, i64, i, index);
157 }
158 }
159
160 private void insertBlock(int i4096, int i64, int i) {
161 indices[i4096] = 1L << i64;
162 assert bits[i4096] == null;
163 bits[i4096] = new long[] { 1L << i };
164 ++nonZeroLongCount;
165 ramBytesUsed += SINGLE_ELEMENT_ARRAY_BYTES_USED;
166 }
167
168 private void insertLong(int i4096, int i64, int i, long index) {
169 indices[i4096] |= 1L << i64;
170
171
172 final int o = Long.bitCount(index & ((1L << i64) - 1));
173 final long[] bitArray = bits[i4096];
174 if (bitArray[bitArray.length - 1] == 0) {
175
176
177 System.arraycopy(bitArray, o, bitArray, o + 1, bitArray.length - o - 1);
178 bitArray[o] = 1L << i;
179 } else {
180
181 final int newSize = oversize(bitArray.length + 1);
182 final long[] newBitArray = new long[newSize];
183 System.arraycopy(bitArray, 0, newBitArray, 0, o);
184 newBitArray[o] = 1L << i;
185 System.arraycopy(bitArray, o, newBitArray, o + 1, bitArray.length - o);
186 bits[i4096] = newBitArray;
187 ramBytesUsed += RamUsageEstimator.sizeOf(newBitArray) - RamUsageEstimator.sizeOf(bitArray);
188 }
189 ++nonZeroLongCount;
190 }
191
192
193
194
195 public void clear(int i) {
196 assert consistent(i);
197 final int i4096 = i >>> 12;
198 final int i64 = i >>> 6;
199 and(i4096, i64, ~(1L << i));
200 }
201
202 private void and(int i4096, int i64, long mask) {
203 final long index = indices[i4096];
204 if ((index & (1L << i64)) != 0) {
205
206 final int o = Long.bitCount(index & ((1L << i64) - 1));
207 long bits = this.bits[i4096][o] & mask;
208 if (bits == 0) {
209 removeLong(i4096, i64, index, o);
210 } else {
211 this.bits[i4096][o] = bits;
212 }
213 }
214 }
215
216 private void removeLong(int i4096, int i64, long index, int o) {
217 index &= ~(1L << i64);
218 indices[i4096] = index;
219 if (index == 0) {
220
221 this.bits[i4096] = null;
222 } else {
223 final int length = Long.bitCount(index);
224 final long[] bitArray = bits[i4096];
225 System.arraycopy(bitArray, o + 1, bitArray, o, length - o);
226 bitArray[length] = 0L;
227 }
228 nonZeroLongCount -= 1;
229 }
230
231 @Override
232 public void clear(int from, int to) {
233 assert from >= 0;
234 assert to <= length;
235 if (from >= to) {
236 return;
237 }
238 final int firstBlock = from >>> 12;
239 final int lastBlock = (to - 1) >>> 12;
240 if (firstBlock == lastBlock) {
241 clearWithinBlock(firstBlock, from & MASK_4096, (to - 1) & MASK_4096);
242 } else {
243 clearWithinBlock(firstBlock, from & MASK_4096, MASK_4096);
244 for (int i = firstBlock + 1; i < lastBlock; ++i) {
245 nonZeroLongCount -= Long.bitCount(indices[i]);
246 indices[i] = 0;
247 bits[i] = null;
248 }
249 clearWithinBlock(lastBlock, 0, (to - 1) & MASK_4096);
250 }
251 }
252
253
254 private static long mask(int from, int to) {
255 return ((1L << (to - from) << 1) - 1) << from;
256 }
257
258 private void clearWithinBlock(int i4096, int from, int to) {
259 int firstLong = from >>> 6;
260 int lastLong = to >>> 6;
261
262 if (firstLong == lastLong) {
263 and(i4096, firstLong, ~mask(from, to));
264 } else {
265 assert firstLong < lastLong;
266 and(i4096, lastLong, ~mask(0, to));
267 for (int i = lastLong - 1; i >= firstLong + 1; --i) {
268 and(i4096, i, 0L);
269 }
270 and(i4096, firstLong, ~mask(from, 63));
271 }
272 }
273
274
275 private int firstDoc(int i4096) {
276 long index = 0;
277 while (i4096 < indices.length) {
278 index = indices[i4096];
279 if (index != 0) {
280 final int i64 = Long.numberOfTrailingZeros(index);
281 return (i4096 << 12) | (i64 << 6) | Long.numberOfTrailingZeros(bits[i4096][0]);
282 }
283 i4096 += 1;
284 }
285 return DocIdSetIterator.NO_MORE_DOCS;
286 }
287
288 @Override
289 public int nextSetBit(int i) {
290 assert i < length;
291 final int i4096 = i >>> 12;
292 final long index = indices[i4096];
293 final long[] bitArray = this.bits[i4096];
294 int i64 = i >>> 6;
295 int o = Long.bitCount(index & ((1L << i64) - 1));
296 if ((index & (1L << i64)) != 0) {
297
298
299 final long bits = bitArray[o] >>> i;
300 if (bits != 0) {
301 return i + Long.numberOfTrailingZeros(bits);
302 }
303 o += 1;
304 }
305 final long indexBits = index >>> i64 >>> 1;
306 if (indexBits == 0) {
307
308 return firstDoc(i4096 + 1);
309 }
310
311 i64 += 1 + Long.numberOfTrailingZeros(indexBits);
312 final long bits = bitArray[o];
313 return (i64 << 6) | Long.numberOfTrailingZeros(bits);
314 }
315
316
317 private int lastDoc(int i4096) {
318 long index;
319 while (i4096 >= 0) {
320 index = indices[i4096];
321 if (index != 0) {
322 final int i64 = 63 - Long.numberOfLeadingZeros(index);
323 final long bits = this.bits[i4096][Long.bitCount(index) - 1];
324 return (i4096 << 12) | (i64 << 6) | (63 - Long.numberOfLeadingZeros(bits));
325 }
326 i4096 -= 1;
327 }
328 return -1;
329 }
330
331 @Override
332 public int prevSetBit(int i) {
333 assert i >= 0;
334 final int i4096 = i >>> 12;
335 final long index = indices[i4096];
336 final long[] bitArray = this.bits[i4096];
337 int i64 = i >>> 6;
338 final long indexBits = index & ((1L << i64) - 1);
339 final int o = Long.bitCount(indexBits);
340 if ((index & (1L << i64)) != 0) {
341
342
343 final long bits = bitArray[o] & ((1L << i << 1) - 1);
344 if (bits != 0) {
345 return (i64 << 6) | (63 - Long.numberOfLeadingZeros(bits));
346 }
347 }
348 if (indexBits == 0) {
349
350
351 return lastDoc(i4096 - 1);
352 }
353
354 i64 = 63 - Long.numberOfLeadingZeros(indexBits);
355 final long bits = bitArray[o - 1];
356 return (i4096 << 12) | (i64 << 6) | (63 - Long.numberOfLeadingZeros(bits));
357 }
358
359
360 private long longBits(long index, long[] bits, int i64) {
361 if ((index & (1L << i64)) == 0) {
362 return 0L;
363 } else {
364 return bits[Long.bitCount(index & ((1L << i64) - 1))];
365 }
366 }
367
368 private void or(final int i4096, final long index, long[] bits, int nonZeroLongCount) {
369 assert Long.bitCount(index) == nonZeroLongCount;
370 final long currentIndex = indices[i4096];
371 if (currentIndex == 0) {
372
373
374 indices[i4096] = index;
375 this.bits[i4096] = Arrays.copyOf(bits, nonZeroLongCount);
376 this.nonZeroLongCount += nonZeroLongCount;
377 return;
378 }
379 final long[] currentBits = this.bits[i4096];
380 final long[] newBits;
381 final long newIndex = currentIndex | index;
382 final int requiredCapacity = Long.bitCount(newIndex);
383 if (currentBits.length >= requiredCapacity) {
384 newBits = currentBits;
385 } else {
386 newBits = new long[oversize(requiredCapacity)];
387 }
388
389
390 for (int i = Long.numberOfLeadingZeros(newIndex), newO = Long.bitCount(newIndex) - 1;
391 i < 64;
392 i += 1 + Long.numberOfLeadingZeros(newIndex << (i + 1)), newO -= 1) {
393
394 final int bitIndex = 63 - i;
395 assert newO == Long.bitCount(newIndex & ((1L << bitIndex) - 1));
396 newBits[newO] = longBits(currentIndex, currentBits, bitIndex) | longBits(index, bits, bitIndex);
397 }
398 indices[i4096] = newIndex;
399 this.bits[i4096] = newBits;
400 this.nonZeroLongCount += nonZeroLongCount - Long.bitCount(currentIndex & index);
401 }
402
403 private void or(SparseFixedBitSet other) {
404 for (int i = 0; i < other.indices.length; ++i) {
405 final long index = other.indices[i];
406 if (index != 0) {
407 or(i, index, other.bits[i], Long.bitCount(index));
408 }
409 }
410 }
411
412
413
414
415 private void orDense(DocIdSetIterator it) throws IOException {
416 assertUnpositioned(it);
417
418
419
420 final int firstDoc = it.nextDoc();
421 if (firstDoc == DocIdSetIterator.NO_MORE_DOCS) {
422 return;
423 }
424 int i4096 = firstDoc >>> 12;
425 int i64 = firstDoc >>> 6;
426 long index = 1L << i64;
427 long currentLong = 1L << firstDoc;
428
429 long[] longs = new long[64];
430 int numLongs = 0;
431
432 for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
433 final int doc64 = doc >>> 6;
434 if (doc64 == i64) {
435
436 currentLong |= 1L << doc;
437 } else {
438 longs[numLongs++] = currentLong;
439
440 final int doc4096 = doc >>> 12;
441 if (doc4096 == i4096) {
442 index |= 1L << doc64;
443 } else {
444
445 or(i4096, index, longs, numLongs);
446
447 i4096 = doc4096;
448 index = 1L << doc64;
449 numLongs = 0;
450 }
451
452
453 i64 = doc64;
454 currentLong = 1L << doc;
455 }
456 }
457
458
459 longs[numLongs++] = currentLong;
460 or(i4096, index, longs, numLongs);
461 }
462
463 @Override
464 public void or(DocIdSetIterator it) throws IOException {
465 {
466
467 final SparseFixedBitSet other = BitSetIterator.getSparseFixedBitSetOrNull(it);
468 if (other != null) {
469 assertUnpositioned(it);
470 or(other);
471 return;
472 }
473 }
474
475
476
477
478
479
480 if (it.cost() < indices.length) {
481
482 super.or(it);
483 } else {
484 orDense(it);
485 }
486 }
487
488
489
490
491
492 @Override
493 public void and(DocIdSetIterator it) throws IOException {
494 final SparseFixedBitSet other = BitSetIterator.getSparseFixedBitSetOrNull(it);
495 if (other != null) {
496
497
498
499
500
501 final int numCommonBlocks = Math.min(indices.length, other.indices.length);
502 for (int i = 0; i < numCommonBlocks; ++i) {
503 if ((indices[i] & other.indices[i]) == 0) {
504 this.nonZeroLongCount -= Long.bitCount(this.indices[i]);
505 this.indices[i] = 0;
506 this.bits[i] = null;
507 }
508 }
509 }
510 super.and(it);
511 }
512
513 @Override
514 public long ramBytesUsed() {
515 return ramBytesUsed;
516 }
517
518 @Override
519 public String toString() {
520 return "SparseFixedBitSet(size=" + length + ",cardinality=~" + approximateCardinality();
521 }
522 }